Polyphénol - part 1: créer un tableau bilan

1/ Introduction

This file is a supplementary data attached with the publication.
The only goal is to create a summary file that gives the quantity of phenol compounds observed for each genotype. The data are organized in 2 years (2013 and 2014)

This RMD file allows to: - charge every file - compute blup of each individuals - compute heritability for each trait - merge all experiments together

We need a few libraries

library(xtable)
library(gdata)
library(lme4)
library(RColorBrewer)
library(plotly)
library(FactoMineR)
my_colors=brewer.pal(8, "Set2") 

Let’s upload every file

# Watch out, to reproduct analysis, you have to update the path.
#my_path="../../DATA/PHENOTYPE"
#setwd(my_path)
# Metabolomique
DATA13=read.xls("../../DATA/PHENOTYPE/Metabolomique_Data.xlsx",  sheet = 1, header = TRUE) 
DATA14=read.xls("../../DATA/PHENOTYPE/Metabolomique_Data.xlsx",  sheet = 2, header = TRUE) 
# All individuals that exist for this pop:
my_indiv=read.table("../../DATA/PHENOTYPE/all_dic2_Silur.txt", header=F)[,1]

2/ Useful Functions

A function to calculate heritability:

calculate_herit=function(my_col, my_data){
  
  aov<- lmer ( my_data[,my_col] ~ (1|geno)  , data=my_data)
  VL<-as.numeric(VarCorr(aov)$geno)
  VRes<-as.numeric(attr(VarCorr(aov),"sc"))^2
  hdeux=round(VL/(VL + VRes) , 6)
  return(hdeux)
}

A function that return BLUPs of individuals.

calculate_blup=function(my_col, my_data){
  aov<- lmer ( my_data[,my_col] ~ (1|geno) , data=my_data)
  blup <- ranef(aov, condVar = TRUE)
  tmp=data.frame(rownames(blup$geno), blup$geno[,1])
  colnames(tmp)[2]=paste( colnames(my_data)[my_col], "blup", sep="_")
  return(tmp)
  }

3/ Year 2013

Clean the dataset

# Rename columns:
DATA13$geno=as.factor(gsub("TT06DC", "TT06DC.", DATA13$geno))

How many different individuals in this files? (counting dic2 and silur)

nlevels(DATA13$geno)
## [1] 166

Always 3 reps / genotypes? –> Yes indeed !

table(DATA13$geno)[table(DATA13$geno)!=3]
## named integer(0)

How many variables?

ncol(DATA13)-2
## [1] 58

Let’s calculate and print heritabilities (with and without inoc date effect):

# Calcul héritabilité:
herit_DATA13=matrix(0,1,ncol(DATA13)-2)
colnames(herit_DATA13)=colnames(DATA13)[3:length(colnames(DATA13))]
num=0
for(i in c(3:ncol(DATA13))){
  num=num+1
  a=calculate_herit(i, DATA13)
  herit_DATA13[1 ,num]=c(a)
  }
Display these heritabilities in a table:
X1_280 X1d_280 X2_280 X3_280 X4_280 X6_280 X6d_280 X7d_280 X7_280 X1d_320 X2d_320 X3d_320 X3_320 X4d_320 p.COUM X6_320 FERULIQ X9_320 X5d_320 X10_320 X1d_360 X2d_360 X1_360 X2_360 X3d_360 X4d_360 X3_360 X5d_360 X6d_360 Orient X7d_360 HOMOO X8d_360 ISOVIT X9d_360 X10d_360 X11d_360 X8_360 X10_360 X11_360 X14_360 X15_360 X16_360 X17_360 X18_360 X22_360 X23_360 X24_360 X25_360 X28_360 X29_360 X30_360 X31_360 X32_360 X33_360 X34_360 X35_360 X36_360
0.92 0.95 0.91 0.96 0.94 0.97 0.98 0.95 0.98 0.95 0.96 0.85 0.95 0.95 0.96 0.97 0.91 0.94 0.94 0.93 0.98 0.98 0.98 0.98 0.96 0.97 0.99 0.97 0.99 0.99 0.90 0.98 0.96 0.98 0.98 0.98 0.98 0.94 0.98 0.98 0.93 0.96 0.98 0.97 0.91 0.99 0.98 0.99 0.98 0.98 0.98 0.99 0.99 0.99 0.99 0.96 0.99 0.99

And show a borplot with these heritabilities:

plot_ly(x=colnames(herit_DATA13), y=herit_DATA13[1,], type="bar")%>%
        layout(
          yaxis = list(title = 'Heritability'),
          xaxis = list(tickfont = list(size=2))
        )
## Warning in arrange_impl(.data, dots): '.Random.seed' n'est pas un vecteur
## d'entiers, mais est de type 'NULL', et sera donc ignoré

Heritabilities are incredibly high. That means repetitions are strongly correlated one each other. So we’are going to compute the blups of genotypes and use only these blups for the QTLs.

Now we calculate the BLUP of every genotype for every variable and add it to a summary table in which we will add the 2 years:

FINAL=my_indiv
for(i in c(3:ncol(DATA13))){
  tmp=calculate_blup(i,  DATA13)
  FINAL=merge(FINAL , tmp, by.x=1, by.y=1, all.x=T)
}
colnames(FINAL)=gsub("blup","blup13", colnames(FINAL))

4/ Year 2014

Clean the dataset

# Rename columns:
DATA14$geno=gsub("Dic2", "dic2", DATA14$geno)
DATA14$geno=gsub("Silur", "silur", DATA14$geno)
DATA14$geno=gsub(" L3", "", DATA14$geno)
DATA14$geno=as.factor(gsub("TT06DC ", "TT06DC.", DATA14$geno))
DATA14=DATA14[ , -3]

How many different individuals in this files? (counting dic2 and silur)

nlevels(DATA14$geno)
## [1] 86

Always 3 reps / genotypes? –> Yes indeed !

table(DATA14$geno)[table(DATA14$geno)!=3]
## named integer(0)

How many variables?

ncol(DATA14)-2
## [1] 58

Let’s calculate and print heritabilities (with and without inoc date effect):

# Calcul héritabilité:
herit_DATA14=matrix(0,1,ncol(DATA14)-2)
colnames(herit_DATA14)=colnames(DATA14)[3:length(colnames(DATA14))]
num=0
for(i in c(3:ncol(DATA14))){
  num=num+1
  a=calculate_herit(i, DATA14)
  herit_DATA14[1 ,num]=c(a)
  }
Display these heritabilities in a table:
X1_280 X1d_280 X2_280 X3_280 X4_280 X6_280 X6d_280 X7d_280 X7_280 X1d_320 X2d_320 X3d_320 X3_320 X4d_320 p.COUM X6_320 FERULIQ X9_320 X5d_320 X10_320 X1d_360 X2d_360 X1_360 X2_360 X3d_360 X4d_360 X3_360 X5d_360 X6d_360 Orient X7d_360 HOMOO X8d_360 ISOVIT X9d_360 X10d_360 X11d_360 X8_360 X10_360 X11_360 X14_360 X15_360 X16_360 X17_360 X18_360 X22_360 X23_360 X24_360 X25_360 X28_360 X29_360 X30_360 X31_360 X32_360 X33_360 X34_360 X35_360 X36_360
0.88 0.99 0.94 0.91 0.94 0.99 0.98 0.96 0.96 1.00 0.99 0.98 0.96 0.97 0.96 0.99 0.95 0.97 0.99 0.98 0.99 0.99 0.98 0.98 0.99 0.95 0.99 0.99 0.98 0.97 0.98 0.99 0.99 0.98 0.99 0.99 0.98 0.93 0.98 0.99 0.96 0.94 0.98 0.97 0.96 0.98 0.97 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.93 0.99 0.99

And show a borplot with these heritabilities:

plot_ly(x=colnames(herit_DATA14), y=herit_DATA14[1,], type="bar")%>%
        layout(
          yaxis = list(title = 'Heritability'),
          xaxis = list(tickfont = list(size=2))
        )

Heritabilities are incredibly high. That means repetitions are strongly correlated one each other. So we’are going to compute the blups of genotypes and use only these blups for the QTLs.

Now we calculate the BLUP of every genotype for every variable and add it to a summary table in which we will add the 2 years:

for(i in c(3:ncol(DATA14))){
  tmp=calculate_blup(i,  DATA14)
  FINAL=merge(FINAL , tmp, by.x=1, by.y=1, all.x=T)
}
colnames(FINAL)=gsub("blup13","yoyoyo", colnames(FINAL))
colnames(FINAL)=gsub("blup","blup14", colnames(FINAL))
colnames(FINAL)=gsub("yoyoyo","blup13", colnames(FINAL))
colnames(FINAL)[1]="geno"

Let’s write this table for the qTL step.

write.table(FINAL, file="../../DATA/PHENOTYPE/phenotypage_all_metabolomique.csv", quote=F, row.names=F, col.names=T, sep=";")

Nombre de variable final??

dim(FINAL)
## [1] 197 117

J’ai bien les 0 dans les noms?

FINAL$geno
##   [1] TT06DC.38.03 TT06DC.38.07 TT06DC.38.08 TT06DC.38.09 TT06DC.38.11
##   [6] TT06DC.38.12 TT06DC.38.13 TT06DC.38.14 TT06DC.38.15 TT06DC.38.19
##  [11] TT06DC.38.22 TT06DC.38.23 TT06DC.38.24 TT06DC.38.25 TT06DC.38.27
##  [16] TT06DC.38.29 TT06DC.38.31 TT06DC.38.32 TT06DC.38.34 TT06DC.38.35
##  [21] TT06DC.38.36 TT06DC.38.37 TT06DC.38.38 TT06DC.38.39 TT06DC.38.40
##  [26] TT06DC.38.41 TT06DC.38.42 TT06DC.38.44 TT06DC.38.47 TT06DC.38.48
##  [31] TT06DC.38.49 TT06DC.38.50 TT06DC.38.51 TT06DC.38.52 TT06DC.38.53
##  [36] TT06DC.38.54 TT06DC.39.03 TT06DC.39.04 TT06DC.39.06 TT06DC.39.07
##  [41] TT06DC.39.08 TT06DC.39.09 TT06DC.39.10 TT06DC.39.11 TT06DC.39.12
##  [46] TT06DC.39.13 TT06DC.39.16 TT06DC.39.17 TT06DC.39.18 TT06DC.39.19
##  [51] TT06DC.39.20 TT06DC.39.21 TT06DC.39.22 TT06DC.39.23 TT06DC.39.24
##  [56] TT06DC.39.25 TT06DC.39.26 TT06DC.39.27 TT06DC.39.28 TT06DC.39.29
##  [61] TT06DC.39.30 TT06DC.39.31 TT06DC.39.32 TT06DC.39.34 TT06DC.39.35
##  [66] TT06DC.39.36 TT06DC.39.37 TT06DC.39.38 TT06DC.40.01 TT06DC.40.04
##  [71] TT06DC.40.05 TT06DC.40.06 TT06DC.40.07 TT06DC.40.08 TT06DC.40.09
##  [76] TT06DC.40.10 TT06DC.40.11 TT06DC.40.12 TT06DC.40.14 TT06DC.40.15
##  [81] TT06DC.40.16 TT06DC.40.17 TT06DC.40.18 TT06DC.40.22 TT06DC.40.23
##  [86] TT06DC.40.24 TT06DC.40.27 TT06DC.40.29 TT06DC.40.31 TT06DC.40.32
##  [91] TT06DC.40.33 TT06DC.40.34 TT06DC.40.35 TT06DC.40.37 TT06DC.40.38
##  [96] TT06DC.40.39 TT06DC.40.40 TT06DC.40.41 TT06DC.40.45 TT06DC.40.46
## [101] TT06DC.40.47 TT06DC.40.49 TT06DC.40.52 TT06DC.40.53 TT06DC.42.01
## [106] TT06DC.42.02 TT06DC.42.03 TT06DC.42.04 TT06DC.42.05 TT06DC.42.06
## [111] TT06DC.42.07 TT06DC.42.09 TT06DC.42.11 TT06DC.42.13 TT06DC.42.14
## [116] TT06DC.42.15 TT06DC.42.16 TT06DC.42.17 TT06DC.42.18 TT06DC.42.19
## [121] TT06DC.42.20 TT06DC.42.21 TT06DC.42.22 TT06DC.42.23 TT06DC.42.24
## [126] TT06DC.42.25 TT06DC.42.26 TT06DC.42.27 TT06DC.42.28 TT06DC.42.29
## [131] TT06DC.42.30 TT06DC.42.31 TT06DC.42.32 TT06DC.42.33 TT06DC.42.34
## [136] TT06DC.42.35 TT06DC.42.36 TT06DC.42.37 TT06DC.42.38 TT06DC.42.40
## [141] TT06DC.42.41 TT06DC.42.42 TT06DC.42.43 TT06DC.42.44 TT06DC.42.45
## [146] TT06DC.42.46 TT06DC.42.47 TT06DC.42.48 TT06DC.42.49 TT06DC.42.50
## [151] TT06DC.42.51 TT06DC.42.52 TT06DC.42.53 TT06DC.42.54 TT06DC.42.55
## [156] TT06DC.42.56 TT06DC.44.03 TT06DC.44.04 TT06DC.44.05 TT06DC.44.07
## [161] TT06DC.44.09 TT06DC.44.10 TT06DC.44.11 TT06DC.44.12 TT06DC.44.13
## [166] TT06DC.44.14 TT06DC.44.18 TT06DC.44.19 TT06DC.44.22 TT06DC.44.23
## [171] TT06DC.44.24 TT06DC.44.28 TT06DC.44.29 TT06DC.44.31 TT06DC.44.33
## [176] TT06DC.44.34 TT06DC.44.35 TT06DC.44.36 TT06DC.44.38 TT06DC.44.39
## [181] TT06DC.44.40 TT06DC.44.41 TT06DC.44.42 TT06DC.44.43 TT06DC.44.44
## [186] TT06DC.44.45 TT06DC.44.47 TT06DC.44.48 TT06DC.44.50 TT06DC.44.52
## [191] TT06DC.44.54 TT06DC.44.55 TT06DC.44.56 TT06DC.44.61 TT06DC.44.62
## [196] TT06DC.44.64 TT06DC.44.65
## 197 Levels: TT06DC.38.03 TT06DC.38.07 TT06DC.38.08 ... TT06DC.44.65

Duplicated genotyped?

table(FINAL$geno)[table(FINAL$geno)>1]
## named integer(0)

Yan Holtz

October 2016